{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>locale</th>\n",
       "      <th>birthyear</th>\n",
       "      <th>gender</th>\n",
       "      <th>joinedAt</th>\n",
       "      <th>location</th>\n",
       "      <th>timezone</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3197468391</td>\n",
       "      <td>id_ID</td>\n",
       "      <td>1993</td>\n",
       "      <td>male</td>\n",
       "      <td>2012-10-02T06:40:55.524Z</td>\n",
       "      <td>Medan  Indonesia</td>\n",
       "      <td>480.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3537982273</td>\n",
       "      <td>id_ID</td>\n",
       "      <td>1992</td>\n",
       "      <td>male</td>\n",
       "      <td>2012-09-29T18:03:12.111Z</td>\n",
       "      <td>Medan  Indonesia</td>\n",
       "      <td>420.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>823183725</td>\n",
       "      <td>en_US</td>\n",
       "      <td>1975</td>\n",
       "      <td>male</td>\n",
       "      <td>2012-10-06T03:14:07.149Z</td>\n",
       "      <td>Stratford  Ontario</td>\n",
       "      <td>-240.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1872223848</td>\n",
       "      <td>en_US</td>\n",
       "      <td>1991</td>\n",
       "      <td>female</td>\n",
       "      <td>2012-11-04T08:59:43.783Z</td>\n",
       "      <td>Tehran  Iran</td>\n",
       "      <td>210.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3429017717</td>\n",
       "      <td>id_ID</td>\n",
       "      <td>1995</td>\n",
       "      <td>female</td>\n",
       "      <td>2012-09-10T16:06:53.132Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>420.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      user_id locale birthyear  gender                  joinedAt  \\\n",
       "0  3197468391  id_ID      1993    male  2012-10-02T06:40:55.524Z   \n",
       "1  3537982273  id_ID      1992    male  2012-09-29T18:03:12.111Z   \n",
       "2   823183725  en_US      1975    male  2012-10-06T03:14:07.149Z   \n",
       "3  1872223848  en_US      1991  female  2012-11-04T08:59:43.783Z   \n",
       "4  3429017717  id_ID      1995  female  2012-09-10T16:06:53.132Z   \n",
       "\n",
       "             location  timezone  \n",
       "0    Medan  Indonesia     480.0  \n",
       "1    Medan  Indonesia     420.0  \n",
       "2  Stratford  Ontario    -240.0  \n",
       "3        Tehran  Iran     210.0  \n",
       "4                 NaN     420.0  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('users.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "38209"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n_records = df.shape[0]\n",
    "n_records"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 38209 entries, 0 to 38208\n",
      "Data columns (total 7 columns):\n",
      "user_id      38209 non-null int64\n",
      "locale       38209 non-null object\n",
      "birthyear    38209 non-null object\n",
      "gender       38100 non-null object\n",
      "joinedAt     38152 non-null object\n",
      "location     32745 non-null object\n",
      "timezone     37773 non-null float64\n",
      "dtypes: float64(1), int64(1), object(5)\n",
      "memory usage: 2.0+ MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "38209"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def get_uniqueUsers():\n",
    "    uniqueUsers = set()\n",
    "    for i in range(n_records):\n",
    "        uniqueUsers.add(df.loc[i, 'user_id'])\n",
    "        \n",
    "    n_events = len(uniqueUsers)\n",
    "    return n_events\n",
    "\n",
    "n_users = get_uniqueUsers()\n",
    "n_users"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>locale</th>\n",
       "      <th>birthyear</th>\n",
       "      <th>gender</th>\n",
       "      <th>joinedAt</th>\n",
       "      <th>timezone</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>id_ID</td>\n",
       "      <td>1993</td>\n",
       "      <td>male</td>\n",
       "      <td>2012-10-02T06:40:55.524Z</td>\n",
       "      <td>480.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>id_ID</td>\n",
       "      <td>1992</td>\n",
       "      <td>male</td>\n",
       "      <td>2012-09-29T18:03:12.111Z</td>\n",
       "      <td>420.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>en_US</td>\n",
       "      <td>1975</td>\n",
       "      <td>male</td>\n",
       "      <td>2012-10-06T03:14:07.149Z</td>\n",
       "      <td>-240.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>en_US</td>\n",
       "      <td>1991</td>\n",
       "      <td>female</td>\n",
       "      <td>2012-11-04T08:59:43.783Z</td>\n",
       "      <td>210.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>id_ID</td>\n",
       "      <td>1995</td>\n",
       "      <td>female</td>\n",
       "      <td>2012-09-10T16:06:53.132Z</td>\n",
       "      <td>420.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  locale birthyear  gender                  joinedAt  timezone\n",
       "0  id_ID      1993    male  2012-10-02T06:40:55.524Z     480.0\n",
       "1  id_ID      1992    male  2012-09-29T18:03:12.111Z     420.0\n",
       "2  en_US      1975    male  2012-10-06T03:14:07.149Z    -240.0\n",
       "3  en_US      1991  female  2012-11-04T08:59:43.783Z     210.0\n",
       "4  id_ID      1995  female  2012-09-10T16:06:53.132Z     420.0"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = df.drop(['user_id'], axis=1)\n",
    "df = df.drop(['location'], axis=1)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import datetime\n",
    "import hashlib\n",
    "import locale\n",
    "\n",
    "from collections import defaultdict\n",
    "from sklearn.preprocessing import normalize\n",
    "\n",
    "class FeatureEng:\n",
    "    def __init__(self):\n",
    "        self.localeIdMap = defaultdict(int)\n",
    "        for i, l in enumerate(locale.locale_alias.keys()):\n",
    "          self.localeIdMap[l] = i + 1\n",
    "        \n",
    "        self.genderIdMap = defaultdict(int, {'NaN': 0, \"male\":1, \"female\":2})\n",
    "  \n",
    "    def getLocaleId(self, locstr):\n",
    "        return self.localeIdMap[locstr.lower()]\n",
    "\n",
    "    def getGenderId(self, genderStr):\n",
    "        return self.genderIdMap[genderStr]\n",
    "\n",
    "    def getJoinedYearMonth(self, dateString):\n",
    "        try:\n",
    "            dttm = datetime.datetime.strptime(dateString, \"%Y-%m-%dT%H:%M:%S.%fZ\")\n",
    "            return (dttm.year-2010)*12 + dttm.month\n",
    "        except:  \n",
    "          return 0\n",
    "\n",
    "    def getBirthYearInt(self, birthYear):\n",
    "        try:\n",
    "          return 0 if birthYear == \"None\" else int(birthYear)\n",
    "        except:\n",
    "          return 0\n",
    "\n",
    "    def getTimezoneInt(self, timezone):\n",
    "        try:\n",
    "          return int(timezone)\n",
    "        except: \n",
    "          return 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "FE = FeatureEng()\n",
    "\n",
    "cols = ['LocaleId', 'BirthYearInt', 'GenderId', 'JoinedYearMonth', 'TimezoneInt']\n",
    "n_cols = len(cols)\n",
    "userMatrix = np.zeros((df.shape[0],n_cols), dtype=np.int)\n",
    "\n",
    "for i in range(df.shape[0]): \n",
    "    userMatrix[i, 0] = FE.getLocaleId(df.loc[i,'locale'])\n",
    "    userMatrix[i, 1] = FE.getBirthYearInt(df.loc[i,'birthyear'])\n",
    "    userMatrix[i, 2] = FE.getGenderId(df.loc[i,'gender'])\n",
    "    userMatrix[i, 3] = FE.getJoinedYearMonth(df.loc[i,'joinedAt'])\n",
    "    userMatrix[i, 4] = FE.getTimezoneInt(df.loc[i,'timezone'])\n",
    "\n",
    "userMatrix = normalize(userMatrix, norm=\"l1\", axis=0, copy=False)\n",
    "\n",
    "df_FE = pd.DataFrame(data=userMatrix, columns=cols) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>LocaleId</th>\n",
       "      <th>BirthYearInt</th>\n",
       "      <th>GenderId</th>\n",
       "      <th>JoinedYearMonth</th>\n",
       "      <th>TimezoneInt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000036</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.000030</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>-0.000018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.000030</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   LocaleId  BirthYearInt  GenderId  JoinedYearMonth  TimezoneInt\n",
       "0  0.000027      0.000027  0.000019         0.000026     0.000036\n",
       "1  0.000027      0.000027  0.000019         0.000026     0.000031\n",
       "2  0.000030      0.000027  0.000019         0.000026    -0.000018\n",
       "3  0.000030      0.000027  0.000038         0.000027     0.000016\n",
       "4  0.000027      0.000027  0.000038         0.000026     0.000031"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_FE.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.cluster import MiniBatchKMeans\n",
    "import time\n",
    "from sklearn import metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def K_cluster_analysis(K, X_train):\n",
    "    start = time.time()\n",
    "    print(\"K-means begin with clusters: {}\".format(K));\n",
    "    mb_kmeans = MiniBatchKMeans(n_clusters = K)\n",
    "    mb_kmeans.fit(X_train)\n",
    "    CH_score = metrics.silhouette_score(X_train,mb_kmeans.predict(X_train))\n",
    "\n",
    "    end = time.time()\n",
    "    print(\"CH_score: {}, time elaps:{}\".format(CH_score, int(end-start)))\n",
    "    \n",
    "    return CH_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K-means begin with clusters: 20\n",
      "CH_score: 0.5568428384531388, time elaps:91\n",
      "K-means begin with clusters: 40\n",
      "CH_score: 0.5808028104311543, time elaps:88\n",
      "K-means begin with clusters: 80\n",
      "CH_score: 0.47785078285291965, time elaps:77\n"
     ]
    }
   ],
   "source": [
    "Ks = [20, 40, 80]\n",
    "CH_scores = []\n",
    "\n",
    "for K in Ks:\n",
    "    ch = K_cluster_analysis(K, df_FE)\n",
    "    CH_scores.append(ch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x7ff14bb7f470>]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAAIABJREFUeJzt3Xu8VmP+//HXp127VA5RX5MOysj4YYjZwgydU4xK45Q0ZJAZwpjBlBmnGufKYYrRpJwrTAiRdBAzDhUyKpEG1TREGMcO+vz++NyZe7bY927fe6/78H4+HvvRXutea+/PenTv9177uq51XebuiIhIcaiVdAEiIlJzFPoiIkVEoS8iUkQU+iIiRUShLyJSRBT6IiJFRKEvIlJEFPoiIkVEoS8iUkRqJ11AeY0bN/ZWrVolXYaISF6ZP3/+++7epKLjci70W7Vqxbx585IuQ0Qkr5jZ25kcp+YdEZEiotAXESkiCn0RkSKi0BcRKSIKfRGRIqLQFxEpIhmFvpn1MLMlZrbUzAZv5vUBZrbazF5OfZya9to1ZrbQzBab2Y1mZtm8ABERyVyF4/TNrAQYDXQDVgBzzWyKuy8qd+gkdx9U7twfAz8B9k7tegboAMyuYt2SBzZuhMceg7Vr4Wc/S7oaEYHM7vTbAUvdfZm7rwMmAr0z/PoO1ANKgbpAHeDdLSlU8seXX8LYsbDnnnDEEXDUUXDNNUlXJSKQWeg3A5anba9I7SvvKDN7xczuN7MWAO7+LDALWJX6mObui6tYs+SoDz6AP/4Rdt4ZTjsN6teHe+6B44+H3/0Ohg1LukIRydY0DA8DE9x9rZmdDtwOdDazXYH/BzRPHTfdzA5x96fTTzazgcBAgJYtW2apJKkpb74J110H48bBF1/A4YfDeedBx45gBsceC3XqwMUXw7p1MHRo7BeRmpdJ6K8EWqRtN0/t+5q7f5C2ORbY9Md8H+A5d/8UwMweAw4Cni53/hhgDEBZWZlXon5J0HPPwfDhMHlyhHr//vCb30SzTrqSEhg/HkpL4y+BtWvh6qsV/CJJyKR5Zy7Qxsxam1kp0BeYkn6AmTVN2+wFbGrCeQfoYGa1zawO0Ymr5p08tnEjPPggHHwwHHQQzJwJQ4bAW2/Brbd+M/A3qVULbrkFzjgDrr0Wzj0XXL/eRWpchXf67r7BzAYB04ASYJy7LzSzocA8d58CnG1mvYANwBpgQOr0+4HOwD+ITt3H3f3h7F+GVLcvvoA77oARI+CNN6BVK7jxRjj5ZGjYMLOvUasWjBoVd/zXXx9NPaNGxX4RqRkZtem7+1Rgarl9F6d9PgQYspnzvgJOr2KNkqDVq+GmmyKc338f9t8f7r0X+vSB2lvQI2QGI0dC3brRxLNuXfwFUFKS/dpF5Jtybj59yQ2vvx6ds7fdFkMwe/aE88+PZp2qtsWbwZVXRvAPHRrBP27clv0SEZHK0Y+ZfM0d/v736Jx96KFohjnxxOic3X337H4vM7jssvgef/hDBP+dd0aHsIhUH4W+8NVX0Tk7fHiMyNl++wjiM8+EHXes3u/9+9/HHf/558P69TBhQvwiEJHqodAvYp99Fs03110XY+2//30YPRpOOgkaNKi5Os47L4L+nHPg6KPhvvviF4GIZJ9Cvwi9+26E++jRsGYNHHhgTJPQu3dyHapnnx3B/6tfRR0PPABbbZVMLSKFTKFfRF57LUbO3HFHtKEfeWTcZf/4x0lXFn75ywj+U0+NOXumTKnZvzhEioFCv8C5w9NPR3v9ww9DvXoxtv7cc2G33ZKu7pt+8YsI/pNOgsMOg0cfha23TroqkcKh0C9QGzbE9AjDh8PcudC4MVx6aTwR26RJ0tV9t/79YxTPCSdA9+4xPfO22yZdlUhhUOgXmE8/jTHv110XUyO0aQN//nMMvcynNvLjjos7/uOOg27dYNo0aNQo6apE8p8egC8Qq1bBhRdCixYxCqZ58xiG+dprcPrp+RX4m/TpE3+tLFgAnTvHE8EiUjUK/Ty3cCGcckrMhXPVVdClCzz7bLTj9+6d//PabOrQfe016NQpRh6JyJbL80goTu4waxb89Kew117xQNNpp8VEaPffH0MwC0n37tGhu2xZzNG/alXSFYnkL4V+Htn0xGpZWTR3zJsXq1EtXx4Ton3/+0lXWH06d4bHH4cVK6BDh/hXRCpPoZ8HPvkkOmZ33RX69YPPP4e//AXefjumS9hhh6QrrBmHHAJPPBFNPO3bR0e1iFSOQj+HrVwZa8u2aBGTnrVqFWPtFy6MB5jq1Uu6wpp30EHw5JPw4Ydxx//mm0lXJJJfFPo56B//gAEDoHXrGGffvTs8/zw89VR0bOZ752xV7b9/9Gl89lnc8S9ZknRFIvmjyOMjd7jHHWyPHrD33tEhe8YZsHQpTJoE7dolXWFuadsWZs+Oh9A6dIBFi5KuSCQ/KPQTtn493HUX7LtvPIS0YAFccQW8804sKdi6ddIV5q699oq/fmrVilE9r7ySdEUiuU+hn5CPP46mm112gZ//PMJ/3LjonBwyJOa0l4rtvnsEf926MY7/xReTrkgktyn0a9jy5TGzZYsWsXDIbrvB1Knw6qsxEZrmka+8Nm1gzpyYmK1z5+j/EJHNU+jXkJdeionEdtklmm169oT582HGjJhNsqrrzha71q0j+Bs3jmayZ55JuiKR3KTQr0buMVFY166w336x7uxZZ8WTpXffHfske1q2jKaenXaKDvHZs5OuSCT3KPSrwbp1cPvtMQqnRw9YvBiuvjqadkaOjHCS6tGsWYT9zjvD4YfHiCgR+S+FfhZ99FGEe+vWMc7eLML/n/+ECy6A7bZLusLi8L3vRfC3aRPPNUydmnRFIrlDoZ8Fb78dK1G1aAGDB8Oee0azzoIFMY99aWnSFRafJk1g5sz4vzjyyGhaExGFfpXMnw/HHx8TnY0aFeHy8ssxP8yhh6pzNmk77BAd5fvtB0cfHQ+8iRQ7hX4lbdwY0/x26hSzXT76aNzlL1sGd94J++yTdIWSbrvt4pfwAQdA375wzz1JVySSLC2XmKG1a2PEzYgR8ch/8+bxcNWpp2r91ly3zTYxLXPPnjFsdv36WHhdpBgp9CuwZk2sMXvjjTGlb9u2MW3CscfG4t2SHxo2jL/KjjwyHoJbty4WnhEpNgr9b7FsWTxEdeutMX99jx7xJG3nzmqrz1f168fSi0cdBQMHRvCfeWbSVYnUrIza9M2sh5ktMbOlZjZ4M68PMLPVZvZy6uPUtNdamtkTZrbYzBaZWavslZ99L7wQd/Ft2sQd/rHHxkRejz0W688q8PNbvXqx2Hrv3jBoUCxOI1JMKrzTN7MSYDTQDVgBzDWzKe5efjLbSe4+aDNf4g7gcnefbmYNgY1VLTrbNm6ERx6JNvqnn442+gsuiKdnd9op6eok2+rWhfvugxNOiMVp1q6NobYixSCT5p12wFJ3XwZgZhOB3kCFM5ib2R5AbXefDuDun1ah1qz74osYcTNyZCzE0bJl3PmdckpM3iWFq06dGMlTp07MarpuHVx0kf6Sk8KXSeg3A5anba8ADtjMcUeZWXvgdeBcd18O7AZ8ZGaTgdbAk8Bgd/8q/UQzGwgMBGhZA3MUvP8+3Hwz/OlPsHp1jOOeMCHGctdWL0fRqF0b7rgjHp675JK44//jHxX8UtiyFXEPAxPcfa2ZnQ7cDnROff1DgH2Bd4BJwADg1vST3X0MMAagrKzMs1TTNyxdGnfy48fHXf5Pfxqdsx066Ae9WJWURGd9aWksXrN2LVx7rd4PUrgyCf2VQIu07eapfV9z9w/SNscC16Q+XwG8nNY09CBwIOVCv7o9+2y01z/wQPw5//OfR1vuHnvUZBWSq2rVik770tJ4DmPdOrjhBgW/FKZMQn8u0MbMWhNh3xfol36AmTV191WpzV7A4rRztzOzJu6+mrj7n5eVyivw1VcxPG/4cPj736FRo2i7PeusmJBLJJ1ZPItRWhp9POvWwU03aRF6KTwVhr67bzCzQcA0oAQY5+4LzWwoMM/dpwBnm1kvYAOwhmjCwd2/MrPzgBlmZsB84C/Vcynh889jZsuRI6M5p3Xr+GE++eR4QEfk25jFTULdunDllRH8f/lLNAGJFIqM2vTdfSowtdy+i9M+HwIM+ZZzpwN7V6HGjHz0UTxMNXp0dNTuvz/cey/06aPOWcmcGVx+eQT/pZdG8N92m95DUjgK5q28fn10wHXtGp2zBx+sNlnZMmYxmqe0FC68MN5bd92laTekMBRM6DdpEvPaN26cdCVSKIYMieA/77wI/okTtTaC5L+C6qZS4Eu2/fa30Sf0wAPws5/Bl18mXZFI1RRU6ItUh7POgltuiVk6e/eOwQIi+UqhL5KBgQNh3DiYPj3W3f3ss6QrEtkyCn2RDJ18cszV9NRTMdX2J58kXZFI5Sn0RSrhhBOiQ/fZZ2Md5I8+SroikcpR6ItU0jHHxCLr8+fHEOE1a5KuSCRzCn2RLXDkkTGi59VXYzW11auTrkgkMwp9kS3005/G/E5LlkCnTvDvfyddkUjFFPoiVXDooTB1Kvzzn9CxI/zrX0lXJPLdFPoiVdSpEzz+OKxcGWszLF9e8TkiSVHoi2TBIYfEGP7Vq6F9+7jzF8lFCn2RLDnwQJgxAz7+OO74ly5NuiKRb1Loi2TRj34Es2bFcpzt28NrryVdkcj/UuiLZNk++8Ds2bBxY3Tuvvpq0hWJ/JdCX6Qa7LlnBH+tWtHRu2BB0hWJBIW+SDXZfXeYMwe22iqCf16NrA4t8t0U+iLVaNddI/i33Ra6dIHnnku6Iil2Cn2RataqVQR/kybQrRs880zSFUkxU+iL1IAWLSL4mzWD7t1jhI9IEhT6IjVkp51iLv7WreHww+GJJ5KuSIqRQl+kBu24Y9zl/+AH0LNnLMEoUpMU+iI1rEkTmDkT9t4b+vSJKZpFaopCXyQB228PTz4ZT/Aecwzce2/SFUmxUOiLJGTbbaNd/6CD4Pjj4e67k65IioFCXyRBW28d0zJ36AA//zmMH590RVLoFPoiCWvQAB55JMbw/+IXcMstSVckhUyhL5ID6teHhx6KJRh/+Uv405+SrkgKVUahb2Y9zGyJmS01s8GbeX2Ama02s5dTH6eWe30bM1thZqOyVbhIoalXDyZPjhE9Z58NI0YkXZEUotoVHWBmJcBooBuwAphrZlPcfVG5Qye5+6Bv+TLDgDlVqlSkCJSWwqRJ0L8/nHcerF0LF16YdFVSSCoMfaAdsNTdlwGY2USgN1A+9DfLzH4E7Ag8DpRtYZ0iRaNOnRjJU1oKv/89rFsHl1wCZklXJoUgk9BvBqQv9bwCOGAzxx1lZu2B14Fz3X25mdUCRgD9ga5VLVakWNSuDbfdFr8ALrss7vivuELBL1WXSehn4mFggruvNbPTgduBzsAZwFR3X2Hf8W41s4HAQICWLVtmqSSR/FZSAmPHxh3/VVfFHf/w4Qp+qZpMQn8l0CJtu3lq39fc/YO0zbHANanPDwIOMbMzgIZAqZl96u6Dy50/BhgDUFZW5pW6ApECVqsW3HxzBP/IkRH8N9wQ+0W2RCahPxdoY2atibDvC/RLP8DMmrr7qtRmL2AxgLufkHbMAKCsfOCLyHczi6CvWzfu9NeuhT//WcEvW6bC0Hf3DWY2CJgGlADj3H2hmQ0F5rn7FOBsM+sFbADWAAOqsWaRomMG11wTwX/55XHHf+ut0QQkUhnmnlutKWVlZT5Pi4mKfKthw+Dii6FfP7j99uj0FTGz+e5e4QhJvV1E8sxFF8WoniFD4o7/nntiWyQTCn2RPDR4cDT1/OY3sH59PNBVt27SVUk+UFeQSJ4691wYNSrm7PnZz+DLL5OuSPKBQl8kj515JowZA489Fssvfv550hVJrlPoi+S5006LefhnzIhZOj/9NOmKJJcp9EUKwEknwV13wdNPQ48e8J//JF2R5CqFvkiB6NcPJk6E55+HQw+Fjz5KuiLJRQp9kQJy9NFw//3w4ovQpQt88EHF50hxUeiLFJjevWNEz8KF0LkzvPde0hVJLlHoixSgww6LdXffeAM6dYJ//zvpiiRXKPRFClTXrjB1Krz9NnToACtXVnyOFD6FvkgB69gRpk2DVasi+N95J+mKJGkKfZEC95OfwPTp8P770L49LFuWdEWSJIW+SBE44ACYORM++STu+N94I+mKJCkKfZEisd9+MGtWzNHToQMsXpx0RZIEhb5IEdl7b5g9GzZujPb+V19NuiKpaQp9kSKz557w1FOx+ErHjvDSS0lXJDVJoS9ShH7wA5gzBxo0iAe45s5NuiKpKQp9kSL1/e/HHX+jRjGm/9lnk65IaoJCX6SItWoVwb/jjjFJ25w5SVck1U2hL1LkWrSI4G/ePKZvmDEj6YqkOin0RYSmTWNUzy67wBFHxFO8UpgU+iICRBPPrFmw++7Qqxc8/HDSFUl1UOiLyNcaN44nd/fZJxZbnzw56Yok2xT6IvI/GjWKuXr23x+OPRYmTUq6Iskmhb6IfMO220a7/k9+Essw3nln0hVJtij0RWSztt465uPv2DEWXh83LumKJBsU+iLyrRo0iBW4uneHU06Bm29OuiKpKoW+iHynrbaCBx+Enj3hjDPghhuSrkiqIqPQN7MeZrbEzJaa2eDNvD7AzFab2cupj1NT+9ua2bNmttDMXjGz47J9ASJS/erWhfvvjxE9v/41XHtt0hXJlqpd0QFmVgKMBroBK4C5ZjbF3ReVO3SSuw8qt+9z4ER3f8PMdgLmm9k0d/8oG8WLSM0pLYWJE+HEE+GCC2DtWvjDH5KuSiqrwtAH2gFL3X0ZgJlNBHoD5UP/G9z99bTP/2Vm7wFNAIW+SB6qUwfuuiv+vegiWLcOLrsMzJKuTDKVSeg3A5anba8ADtjMcUeZWXvgdeBcd08/BzNrB5QCb25hrSKSA0pKYPz4uPMfNiyC/8orFfz5IpPQz8TDwAR3X2tmpwO3A503vWhmTYE7gZPcfWP5k81sIDAQoGXLllkqSUSqS0kJjBkTwX/11dHUM3Kkgj8fZBL6K4EWadvNU/u+5u4fpG2OBa7ZtGFm2wCPAr939+c29w3cfQwwBqCsrMwzqlxEElWrFoweHcF//fVxx/+nP8V+yV2ZhP5coI2ZtSbCvi/QL/0AM2vq7qtSm72Axan9pcADwB3ufn/WqhaRnGAG110Xo3uuuSaC/5ZbFPy5rMLQd/cNZjYImAaUAOPcfaGZDQXmufsU4Gwz6wVsANYAA1KnHwu0B3Yws037Brj7y9m9DBFJihlcdVUE/6Y2/nHjoglIco+551ZrSllZmc+bNy/pMkRkCwwbBhdfDH37xnw9tbPVaygVMrP57l5W0XH6LxGRrLnoorjj/93vYP16uOeeaPOX3KGWNxHJqgsuiHb+v/4Vjj46RvZI7lDoi0jW/frXcNNNsfrWkUfCF18kXZFsotAXkWrxq1/B2LExL3/PnvDZZ0lXJKDQF5FqdMopcNttsfbu4YfDJ58kXZEo9EWkWp14Itx9N/ztb9CjB3z8cdIVFTeFvohUu759Y63dF16Abt3gww+Trqh4KfRFpEYcdRRMngwLFkCXLvD++0lXVJwU+iJSY3r2hIcegkWLoHNneO+9pCsqPgp9EalRPXrAo4/C0qWx6PqqVRWeIlmk0BeRGtelCzz2GLzzDnToACtWJF1R8VDoi0giOnSAJ56Ad9+Nz99+O+mKioNCX0QS8+Mfw5NPwpo10L49vKl19aqdQl9EErX//jBzJnz6adzxv/56xefIllPoi0ji9t0XZs+Oufg7dIjRPVI9FPoikhN++MMIfohRPa+8kmQ1hUuhLyI5Y4894KmnYg7+Tp3gxReTrqjwKPRFJKfsthvMmQMNG8bQzhdeSLqiwqLQF5Gcs8suEfzbbw9du8ZkbZIdCn0RyUk77xxNPU2bQvfu8blUnUJfRHJW8+bRubvzznDYYTGmX6pGoS8iOa1p01iEZddd4YgjYvoG2XIKfRHJef/3fxH8e+wRa+5OmZJ0RflLoS8ieWGHHWDGDGjbNubm/+tfk64oPyn0RSRvNGoE06dDu3Zw3HEwYULSFeUfhb6I5JVttoFp0+Dgg6F/f7j99qQryi8KfRHJOw0bwtSpsfrWySfD2LFJV5Q/FPoikpfq148O3e7d4bTT4Kabkq4oPyj0RSRvbbUVPPgg9OoFZ54J11+fdEW5T6EvInmtbl24774Y0XPuuXD11UlXlNsyCn0z62FmS8xsqZkN3szrA8xstZm9nPo4Ne21k8zsjdTHSdksXkQEYlbOiRPh+ONh8GAYNizpinJX7YoOMLMSYDTQDVgBzDWzKe5efpmDSe4+qNy52wOXAGWAA/NT536YlepFRFJq14Y774Q6deDii2Ht2gh/s6Qryy0Vhj7QDljq7ssAzGwi0BvIZG2b7sB0d1+TOnc60APQ6FoRybqSEhg/Pu78L788VuK6+moFf7pMQr8ZsDxtewVwwGaOO8rM2gOvA+e6+/JvObdZ+RPNbCAwEKBly5aZVS4ishm1asEtt0TwX3ttBP911yn4N8lWR+7DQCt33xuYDlTqcQl3H+PuZe5e1qRJkyyVJCLFqlYtGDUqOnZvuAHOOAM2bky6qtyQyZ3+SqBF2nbz1L6vufsHaZtjgWvSzu1Y7tzZlS1SRKSyzGDEiBjdc9VVccc/Zkw0ARWzTEJ/LtDGzFoTId4X6Jd+gJk1dfdVqc1ewOLU59OAK8ysUWr7UGBIlasWEcmAGVxxRTT1DB0awT9+fHT6FqsKL93dN5jZICLAS4Bx7r7QzIYC89x9CnC2mfUCNgBrgAGpc9eY2TDiFwfA0E2duiIiNcEMLrssgv8Pf4D16/87yqcYmbsnXcP/KCsr83nz5iVdhogUoOHD4fzzoU+fGNdfWpp0RdljZvPdvayi4/RErogUjfPOi47dBx6IJ3i//DLpimqeQl9EisrZZ8PNN8Mjj0Dv3vDFF0lXVLMU+iJSdH75S7j11liQ5Ygj4LPPkq6o5ij0RaQo/eIXcMcdMHs2HHYYfPJJ0hXVDIW+iBSt/v3hnnvg73+Pefk//jjpiqqfQl9Eitpxx8XUzPPmQdeusKbAB5Ur9EWk6PXpA5MnwyuvQJcu8P77SVdUfRT6IiJEh+6UKfDaa9CpE7z7btIVVQ+FvohISvfu8OijsGwZdOwI//pX0hVln0JfRCRN587w+OOwYgV06ADLl1d8Tj5R6IuIlHPIIfDEE/DeexH8b72VdEXZo9AXEdmMgw6CGTPgww+hfXtYujTpirJDoS8i8i3KymDWLPj887jjX7Ik6YqqTqEvIvId2raNp3Y3bIjgX7gw6YqqRqEvIlKBvfaCp56KZRg7doQFC5KuaMsp9EVEMrD77hH89erFOP7585OuaMso9EVEMtSmDcyZA9tsE0/uPv980hVVnkJfRKQSWreO4G/cGLp1g2eeSbqiylHoi4hUUsuW0dSz007Qo0d09OYLhb6IyBZo1izCfued4fDDY0GWfKDQFxHZQt/7XgR/mzbQsydMnZp0RRVT6IuIVEGTJjBzJuy5Jxx5JDz0UNIVfTeFvohIFe2wQ0zZsN9+cPTRsShLrlLoi4hkwXbbxSRtBx4IffvGMoy5SKEvIpIl22wDjz0WE7T17w+33ZZ0Rd+k0BcRyaKGDWMhlq5d4eSTYcyYpCv6Xwp9EZEsq18/ll48/HA4/XQYNSrpiv5LoS8iUg3q1YvF1nv3hrPOgpEjk64oKPRFRKpJ3boxkueYY+C3v4Urr0y6ogxD38x6mNkSM1tqZoO/47ijzMzNrCy1XcfMbjezf5jZYjMbkq3CRUTyQZ06MZKnXz+48EK47DJwT66e2hUdYGYlwGigG7ACmGtmU9x9UbnjtgbOAdLnnTsGqOvuPzSz+sAiM5vg7m9l6wJERHJd7dpwxx1QWgqXXgrr1sEf/whmCdSSwTHtgKXuvgzAzCYCvYFF5Y4bBlwNnJ+2z4EGZlYb2ApYB/ynqkWLiOSbkhK49dYI/iuugLVr4dpraz74M2neaQYsT9tekdr3NTPbD2jh7o+WO/d+4DNgFfAOMNzd12x5uSIi+atWLfjzn2HQIBgxAs45p+abejK50/9OZlYLGAkM2MzL7YCvgJ2ARsDTZvbkpr8a0r7GQGAgQMuWLatakohIzjKDG2+MTt4RI+KO/+ab4xdCTcgk9FcCLdK2m6f2bbI1sBcw2+LvlO8BU8ysF9APeNzd1wPvmdnfgDLgf0Lf3ccAYwDKysoS7OIQEal+ZtG0U1oaI3rWrYOxY6MJqLpl8rtlLtDGzFqbWSnQF5iy6UV3/9jdG7t7K3dvBTwH9HL3eUSTTmcAM2sAHAi8luVrEBHJO2Zw+eXRsXvbbXDiibBhQ/V/3wrv9N19g5kNAqYBJcA4d19oZkOBee4+5TtOHw2MN7OFgAHj3f2VbBQuIpLvzOCSS+KO/8ILYf16mDCheu/4M2rTd/epwNRy+y7+lmM7pn3+KTFsU0REvsWQIdHG/+GH1d/EU+WOXBERqbrf/KZmvo+mYRARKSIKfRGRIqLQFxEpIgp9EZEiotAXESkiCn0RkSKi0BcRKSIKfRGRImKe5BIum2Fmq4G3q/AlGgPvZ6mcJBXKdYCuJVcVyrUUynVA1a5lZ3dvUtFBORf6VWVm89y9LOk6qqpQrgN0LbmqUK6lUK4DauZa1LwjIlJEFPoiIkWkEEN/TNIFZEmhXAfoWnJVoVxLoVwH1MC1FFybvoiIfLtCvNMXEZFvkbehb2YtzGyWmS0ys4Vmdk5q//ZmNt3M3kj92yjpWitiZvXM7AUzW5C6lstS+1ub2fNmttTMJqWWq8x5ZlZiZi+Z2SOp7Xy9jrfM7B9m9rKZzUvty7v3F4CZbWdm95vZa2a22MwOysdrMbMfpP4/Nn38x8x+nafXcm7q5/1VM5uQyoFq/1nJ29AHNgC/dfc9iLV3zzSzPYDBwAx3bwPMSG3nurVAZ3ffB2gL9DCzA4GrgevcfVfgQ+CUBGusjHOAxWnb+XodAJ3cvW3aMLp8fH8B3AA87u67A/sQ/z95dy3uviT1/9EW+BHwOfAAeXYtZtab1YAGAAADA0lEQVQMOBsoc/e9iKVo+1ITPyvuXhAfwENAN2AJ0DS1rymwJOnaKnkd9YEXgQOIhzRqp/YfBExLur4M6m9O/NB1Bh4h1kbOu+tI1foW0Ljcvrx7fwHbAv8k1YeXz9dSrv5Dgb/l47UAzYDlwPbECoaPAN1r4mcln+/0v2ZmrYB9geeBHd19VeqlfwM7JlRWpaSaRF4G3gOmA28CH7n7htQhK4g3Sq67HrgA2Jja3oH8vA4AB54ws/lmNjC1Lx/fX62B1cD4VLPbWDNrQH5eS7q+wITU53l1Le6+EhgOvAOsAj4G5lMDPyt5H/pm1hD4K/Brd/9P+msevy7zYniSu3/l8Sdrc6AdsHvCJVWamR0BvOfu85OuJUsOdvf9gMOI5sP26S/m0furNrAfcLO77wt8Rrnmjzy6FgBSbd29gPvKv5YP15Lqc+hN/ELeCWgA9KiJ753XoW9mdYjAv9vdJ6d2v2tmTVOvNyXunPOGu38EzCL+tNvOzDYtXt8cWJlYYZn5CdDLzN4CJhJNPDeQf9cBfH03hru/R7QbtyM/318rgBXu/nxq+37il0A+XssmhwEvuvu7qe18u5auwD/dfbW7rwcmEz8/1f6zkrehb2YG3AosdveRaS9NAU5KfX4S0daf08ysiZltl/p8K6JvYjER/kenDsv5a3H3Ie7e3N1bEX96z3T3E8iz6wAwswZmtvWmz4n241fJw/eXu/8bWG5mP0jt6gIsIg+vJc3x/LdpB/LvWt4BDjSz+qks2/R/Uu0/K3n7cJaZHQw8DfyD/7YfX0i0698LtCRm6zzW3dckUmSGzGxv4HaiB78WcK+7DzWzXYg75u2Bl4D+7r42uUozZ2YdgfPc/Yh8vI5UzQ+kNmsD97j75Wa2A3n2/gIws7bAWKAUWAacTOq9Rv5dSwMiNHdx949T+/Lu/yU1NPs4YiTiS8CpRBt+tf6s5G3oi4hI5eVt846IiFSeQl9EpIgo9EVEiohCX0SkiCj0RUSKiEJfRKSIKPRFRIqIQl9EpIj8f+RLOlODBai5AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7ff14dbb8940>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.plot(Ks, np.array(CH_scores), 'b-')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MiniBatchKMeans(batch_size=100, compute_labels=True, init='k-means++',\n",
       "        init_size=None, max_iter=100, max_no_improvement=10, n_clusters=40,\n",
       "        n_init=3, random_state=None, reassignment_ratio=0.01, tol=0.0,\n",
       "        verbose=0)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n_clusters = 40\n",
    "mb_kmeans = MiniBatchKMeans(n_clusters=n_clusters)\n",
    "mb_kmeans.fit(df_FE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_FE['cluster_40'] = mb_kmeans.predict(df_FE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_FE.to_csv('users_predict.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>LocaleId</th>\n",
       "      <th>BirthYearInt</th>\n",
       "      <th>GenderId</th>\n",
       "      <th>JoinedYearMonth</th>\n",
       "      <th>TimezoneInt</th>\n",
       "      <th>cluster_40</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000036</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.000030</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000019</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>-0.000018</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.000030</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000016</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000027</td>\n",
       "      <td>0.000038</td>\n",
       "      <td>0.000026</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   LocaleId  BirthYearInt  GenderId  JoinedYearMonth  TimezoneInt  cluster_40\n",
       "0  0.000027      0.000027  0.000019         0.000026     0.000036           1\n",
       "1  0.000027      0.000027  0.000019         0.000026     0.000031          12\n",
       "2  0.000030      0.000027  0.000019         0.000026    -0.000018           7\n",
       "3  0.000030      0.000027  0.000038         0.000027     0.000016          11\n",
       "4  0.000027      0.000027  0.000038         0.000026     0.000031          26"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_FE.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  },
  "toc": {
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": "block",
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
